1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 static void re_string_construct_common (const char *str
, int len
,
22 __RE_TRANSLATE_TYPE trans
, int icase
,
23 const re_dfa_t
*dfa
) internal_function
;
24 static re_dfastate_t
*create_ci_newstate (const re_dfa_t
*dfa
,
25 const re_node_set
*nodes
,
26 unsigned int hash
) internal_function
;
27 static re_dfastate_t
*create_cd_newstate (const re_dfa_t
*dfa
,
28 const re_node_set
*nodes
,
30 unsigned int hash
) internal_function
;
32 /* Functions for string operation. */
34 /* This function allocate the buffers. It is necessary to call
35 re_string_reconstruct before using the object. */
39 re_string_allocate (re_string_t
*pstr
, const char *str
, int len
, int init_len
,
40 __RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
45 /* Ensure at least one character fits into the buffers. */
46 if (init_len
< dfa
->mb_cur_max
)
47 init_len
= dfa
->mb_cur_max
;
48 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
49 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
51 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
52 if (BE (ret
!= REG_NOERROR
, 0))
55 pstr
->word_char
= dfa
->word_char
;
56 pstr
->word_ops_used
= dfa
->word_ops_used
;
57 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
58 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
59 pstr
->valid_raw_len
= pstr
->valid_len
;
63 /* This function allocate the buffers, and initialize them. */
67 re_string_construct (re_string_t
*pstr
, const char *str
, int len
,
68 __RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
71 memset (pstr
, '\0', sizeof (re_string_t
));
72 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
76 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
77 if (BE (ret
!= REG_NOERROR
, 0))
80 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
85 if (dfa
->mb_cur_max
> 1)
89 ret
= build_wcs_upper_buffer (pstr
);
90 if (BE (ret
!= REG_NOERROR
, 0))
92 if (pstr
->valid_raw_len
>= len
)
94 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
96 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
97 if (BE (ret
!= REG_NOERROR
, 0))
102 #endif /* RE_ENABLE_I18N */
103 build_upper_buffer (pstr
);
107 #ifdef RE_ENABLE_I18N
108 if (dfa
->mb_cur_max
> 1)
109 build_wcs_buffer (pstr
);
114 re_string_translate_buffer (pstr
);
117 pstr
->valid_len
= pstr
->bufs_len
;
118 pstr
->valid_raw_len
= pstr
->bufs_len
;
126 /* Helper functions for re_string_allocate, and re_string_construct. */
130 re_string_realloc_buffers (re_string_t
*pstr
, int new_buf_len
)
132 #ifdef RE_ENABLE_I18N
133 if (pstr
->mb_cur_max
> 1)
135 wint_t *new_wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
136 if (BE (new_wcs
== NULL
, 0))
139 if (pstr
->offsets
!= NULL
)
141 int *new_offsets
= re_realloc (pstr
->offsets
, int, new_buf_len
);
142 if (BE (new_offsets
== NULL
, 0))
144 pstr
->offsets
= new_offsets
;
147 #endif /* RE_ENABLE_I18N */
148 if (pstr
->mbs_allocated
)
150 unsigned char *new_mbs
= re_realloc (pstr
->mbs
, unsigned char,
152 if (BE (new_mbs
== NULL
, 0))
156 pstr
->bufs_len
= new_buf_len
;
163 re_string_construct_common (const char *str
, int len
, re_string_t
*pstr
,
164 __RE_TRANSLATE_TYPE trans
, int icase
,
167 pstr
->raw_mbs
= (const unsigned char *) str
;
171 pstr
->icase
= icase
? 1 : 0;
172 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
173 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
174 pstr
->is_utf8
= dfa
->is_utf8
;
175 pstr
->map_notascii
= dfa
->map_notascii
;
176 pstr
->stop
= pstr
->len
;
177 pstr
->raw_stop
= pstr
->stop
;
180 #ifdef RE_ENABLE_I18N
182 /* Build wide character buffer PSTR->WCS.
183 If the byte sequence of the string are:
184 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
185 Then wide character buffer will be:
186 <wc1> , WEOF , <wc2> , WEOF , <wc3>
187 We use WEOF for padding, they indicate that the position isn't
188 a first byte of a multibyte character.
190 Note that this function assumes PSTR->VALID_LEN elements are already
191 built and starts from PSTR->VALID_LEN. */
195 build_wcs_buffer (re_string_t
*pstr
)
197 #if defined __UCLIBC__
198 unsigned char buf
[MB_LEN_MAX
];
199 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
201 unsigned char buf
[64];
204 int byte_idx
, end_idx
, remain_len
;
207 /* Build the buffers from pstr->valid_len to either pstr->len or
209 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
210 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
215 remain_len
= end_idx
- byte_idx
;
216 prev_st
= pstr
->cur_state
;
217 /* Apply the translation if we need. */
218 if (BE (pstr
->trans
!= NULL
, 0))
222 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
224 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
225 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
227 p
= (const char *) buf
;
230 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
231 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
232 if (BE (mbclen
== (size_t) -2, 0))
234 /* The buffer doesn't have enough space, finish to build. */
235 pstr
->cur_state
= prev_st
;
238 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
240 /* We treat these cases as a singlebyte character. */
242 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
243 if (BE (pstr
->trans
!= NULL
, 0))
244 wc
= pstr
->trans
[wc
];
245 pstr
->cur_state
= prev_st
;
248 /* Write wide character and padding. */
249 pstr
->wcs
[byte_idx
++] = wc
;
250 /* Write paddings. */
251 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
252 pstr
->wcs
[byte_idx
++] = WEOF
;
254 pstr
->valid_len
= byte_idx
;
255 pstr
->valid_raw_len
= byte_idx
;
258 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
259 but for REG_ICASE. */
263 build_wcs_upper_buffer (re_string_t
*pstr
)
266 int src_idx
, byte_idx
, end_idx
, remain_len
;
268 #if defined __UCLIBC__
269 char buf
[MB_LEN_MAX
];
270 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
275 byte_idx
= pstr
->valid_len
;
276 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
278 /* The following optimization assumes that ASCII characters can be
279 mapped to wide characters with a simple cast. */
280 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
282 while (byte_idx
< end_idx
)
286 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
287 && mbsinit (&pstr
->cur_state
))
289 /* In case of a singlebyte character. */
291 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
292 /* The next step uses the assumption that wchar_t is encoded
293 ASCII-safe: all ASCII values can be converted like this. */
294 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
299 remain_len
= end_idx
- byte_idx
;
300 prev_st
= pstr
->cur_state
;
301 mbclen
= mbrtowc (&wc
,
302 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
303 + byte_idx
), remain_len
, &pstr
->cur_state
);
304 if (BE (mbclen
+ 2 > 2, 1))
312 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
313 if (BE (mbclen
== mbcdlen
, 1))
314 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
322 memcpy (pstr
->mbs
+ byte_idx
,
323 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
324 pstr
->wcs
[byte_idx
++] = wcu
;
325 /* Write paddings. */
326 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
327 pstr
->wcs
[byte_idx
++] = WEOF
;
329 else if (mbclen
== (size_t) -1 || mbclen
== 0)
331 /* It is an invalid character or '\0'. Just use the byte. */
332 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
333 pstr
->mbs
[byte_idx
] = ch
;
334 /* And also cast it to wide char. */
335 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
336 if (BE (mbclen
== (size_t) -1, 0))
337 pstr
->cur_state
= prev_st
;
341 /* The buffer doesn't have enough space, finish to build. */
342 pstr
->cur_state
= prev_st
;
346 pstr
->valid_len
= byte_idx
;
347 pstr
->valid_raw_len
= byte_idx
;
351 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
356 remain_len
= end_idx
- byte_idx
;
357 prev_st
= pstr
->cur_state
;
358 if (BE (pstr
->trans
!= NULL
, 0))
362 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
364 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
365 buf
[i
] = pstr
->trans
[ch
];
367 p
= (const char *) buf
;
370 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
371 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
372 if (BE (mbclen
+ 2 > 2, 1))
380 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
381 if (BE (mbclen
== mbcdlen
, 1))
382 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
383 else if (mbcdlen
!= (size_t) -1)
387 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
389 pstr
->cur_state
= prev_st
;
393 if (pstr
->offsets
== NULL
)
395 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
397 if (pstr
->offsets
== NULL
)
400 if (!pstr
->offsets_needed
)
402 for (i
= 0; i
< (size_t) byte_idx
; ++i
)
403 pstr
->offsets
[i
] = i
;
404 pstr
->offsets_needed
= 1;
407 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
408 pstr
->wcs
[byte_idx
] = wcu
;
409 pstr
->offsets
[byte_idx
] = src_idx
;
410 for (i
= 1; i
< mbcdlen
; ++i
)
412 pstr
->offsets
[byte_idx
+ i
]
413 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
414 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
416 pstr
->len
+= mbcdlen
- mbclen
;
417 if (pstr
->raw_stop
> src_idx
)
418 pstr
->stop
+= mbcdlen
- mbclen
;
419 end_idx
= (pstr
->bufs_len
> pstr
->len
)
420 ? pstr
->len
: pstr
->bufs_len
;
426 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
429 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
431 if (BE (pstr
->offsets_needed
!= 0, 0))
434 for (i
= 0; i
< mbclen
; ++i
)
435 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
439 pstr
->wcs
[byte_idx
++] = wcu
;
440 /* Write paddings. */
441 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
442 pstr
->wcs
[byte_idx
++] = WEOF
;
444 else if (mbclen
== (size_t) -1 || mbclen
== 0)
446 /* It is an invalid character or '\0'. Just use the byte. */
447 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
449 if (BE (pstr
->trans
!= NULL
, 0))
450 ch
= pstr
->trans
[ch
];
451 pstr
->mbs
[byte_idx
] = ch
;
453 if (BE (pstr
->offsets_needed
!= 0, 0))
454 pstr
->offsets
[byte_idx
] = src_idx
;
457 /* And also cast it to wide char. */
458 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
459 if (BE (mbclen
== (size_t) -1, 0))
460 pstr
->cur_state
= prev_st
;
464 /* The buffer doesn't have enough space, finish to build. */
465 pstr
->cur_state
= prev_st
;
469 pstr
->valid_len
= byte_idx
;
470 pstr
->valid_raw_len
= src_idx
;
474 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
479 re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
, wint_t *last_wc
)
486 /* Skip the characters which are not necessary to check. */
487 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
488 rawbuf_idx
< new_raw_idx
;)
491 remain_len
= pstr
->len
- rawbuf_idx
;
492 prev_st
= pstr
->cur_state
;
493 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
494 remain_len
, &pstr
->cur_state
);
495 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
497 /* We treat these cases as a singlebyte character. */
499 pstr
->cur_state
= prev_st
;
501 /* Then proceed the next character. */
502 rawbuf_idx
+= mbclen
;
504 *last_wc
= (wint_t) wc
;
507 #endif /* RE_ENABLE_I18N */
509 /* Build the buffer PSTR->MBS, and apply the translation if we need.
510 This function is used in case of REG_ICASE. */
514 build_upper_buffer (re_string_t
*pstr
)
516 int char_idx
, end_idx
;
517 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
519 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
521 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
522 if (BE (pstr
->trans
!= NULL
, 0))
523 ch
= pstr
->trans
[ch
];
525 pstr
->mbs
[char_idx
] = toupper (ch
);
527 pstr
->mbs
[char_idx
] = ch
;
529 pstr
->valid_len
= char_idx
;
530 pstr
->valid_raw_len
= char_idx
;
533 /* Apply TRANS to the buffer in PSTR. */
537 re_string_translate_buffer (re_string_t
*pstr
)
539 int buf_idx
, end_idx
;
540 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
542 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
544 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
545 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
548 pstr
->valid_len
= buf_idx
;
549 pstr
->valid_raw_len
= buf_idx
;
552 /* This function re-construct the buffers.
553 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
554 convert to upper case in case of REG_ICASE, apply translation. */
558 re_string_reconstruct (re_string_t
*pstr
, int idx
, int eflags
)
560 int offset
= idx
- pstr
->raw_mbs_idx
;
561 if (BE (offset
< 0, 0))
564 #ifdef RE_ENABLE_I18N
565 if (pstr
->mb_cur_max
> 1)
566 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
568 pstr
->len
= pstr
->raw_len
;
569 pstr
->stop
= pstr
->raw_stop
;
571 pstr
->raw_mbs_idx
= 0;
572 pstr
->valid_raw_len
= 0;
573 pstr
->offsets_needed
= 0;
574 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
575 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
576 if (!pstr
->mbs_allocated
)
577 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
581 if (BE (offset
!= 0, 1))
583 /* Are the characters which are already checked remain? */
584 if (BE (offset
< pstr
->valid_raw_len
, 1)
585 #ifdef RE_ENABLE_I18N
586 /* Handling this would enlarge the code too much.
587 Accept a slowdown in that case. */
588 && pstr
->offsets_needed
== 0
592 /* Yes, move them to the front of the buffer. */
593 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
594 #ifdef RE_ENABLE_I18N
595 if (pstr
->mb_cur_max
> 1)
596 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
597 (pstr
->valid_len
- offset
) * sizeof (wint_t));
599 if (BE (pstr
->mbs_allocated
, 0))
600 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
601 pstr
->valid_len
- offset
);
602 pstr
->valid_len
-= offset
;
603 pstr
->valid_raw_len
-= offset
;
605 assert (pstr
->valid_len
> 0);
610 /* No, skip all characters until IDX. */
611 #ifdef RE_ENABLE_I18N
612 if (BE (pstr
->offsets_needed
, 0))
614 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
615 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
616 pstr
->offsets_needed
= 0;
620 pstr
->valid_raw_len
= 0;
621 #ifdef RE_ENABLE_I18N
622 if (pstr
->mb_cur_max
> 1)
629 const unsigned char *raw
, *p
, *end
;
631 /* Special case UTF-8. Multi-byte chars start with any
632 byte other than 0x80 - 0xbf. */
633 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
634 end
= raw
+ (offset
- pstr
->mb_cur_max
);
635 p
= raw
+ offset
- 1;
637 /* We know the wchar_t encoding is UCS4, so for the simple
638 case, ASCII characters, skip the conversion step. */
639 if (isascii (*p
) && BE (pstr
->trans
== NULL
, 1))
641 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
647 for (; p
>= end
; --p
)
648 if ((*p
& 0xc0) != 0x80)
652 int mlen
= raw
+ pstr
->len
- p
;
655 /* XXX Don't use mbrtowc, we know which conversion
656 to use (UTF-8 -> UCS4). */
657 memset (&cur_state
, 0, sizeof (cur_state
));
658 mbclen
= mbrtowc (&wc2
, (const char *) p
, mlen
,
660 if (raw
+ offset
- p
<= mbclen
661 && mbclen
< (size_t) -2)
663 memset (&pstr
->cur_state
, '\0',
665 pstr
->valid_len
= mbclen
- (raw
+ offset
- p
);
673 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
674 if (BE (pstr
->valid_len
, 0))
676 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
677 pstr
->wcs
[wcs_idx
] = WEOF
;
678 if (pstr
->mbs_allocated
)
679 memset (pstr
->mbs
, 255, pstr
->valid_len
);
681 pstr
->valid_raw_len
= pstr
->valid_len
;
682 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
683 && IS_WIDE_WORD_CHAR (wc
))
685 : ((IS_WIDE_NEWLINE (wc
)
686 && pstr
->newline_anchor
)
687 ? CONTEXT_NEWLINE
: 0));
690 #endif /* RE_ENABLE_I18N */
692 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
695 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
697 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
698 ? CONTEXT_NEWLINE
: 0));
701 if (!BE (pstr
->mbs_allocated
, 0))
704 pstr
->raw_mbs_idx
= idx
;
706 pstr
->stop
-= offset
;
708 /* Then build the buffers. */
709 #ifdef RE_ENABLE_I18N
710 if (pstr
->mb_cur_max
> 1)
714 reg_errcode_t ret
= build_wcs_upper_buffer (pstr
);
715 if (BE (ret
!= REG_NOERROR
, 0))
719 build_wcs_buffer (pstr
);
723 if (BE (pstr
->mbs_allocated
, 0))
726 build_upper_buffer (pstr
);
727 else if (pstr
->trans
!= NULL
)
728 re_string_translate_buffer (pstr
);
731 pstr
->valid_len
= pstr
->len
;
738 internal_function
__attribute ((pure
))
739 re_string_peek_byte_case (const re_string_t
*pstr
, int idx
)
743 /* Handle the common (easiest) cases first. */
744 if (BE (!pstr
->mbs_allocated
, 1))
745 return re_string_peek_byte (pstr
, idx
);
747 #ifdef RE_ENABLE_I18N
748 if (pstr
->mb_cur_max
> 1
749 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
750 return re_string_peek_byte (pstr
, idx
);
753 off
= pstr
->cur_idx
+ idx
;
754 #ifdef RE_ENABLE_I18N
755 if (pstr
->offsets_needed
)
756 off
= pstr
->offsets
[off
];
759 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
761 #ifdef RE_ENABLE_I18N
762 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
763 this function returns CAPITAL LETTER I instead of first byte of
764 DOTLESS SMALL LETTER I. The latter would confuse the parser,
765 since peek_byte_case doesn't advance cur_idx in any way. */
766 if (pstr
->offsets_needed
&& !isascii (ch
))
767 return re_string_peek_byte (pstr
, idx
);
774 internal_function
__attribute ((pure
))
775 re_string_fetch_byte_case (re_string_t
*pstr
)
777 if (BE (!pstr
->mbs_allocated
, 1))
778 return re_string_fetch_byte (pstr
);
780 #ifdef RE_ENABLE_I18N
781 if (pstr
->offsets_needed
)
785 /* For tr_TR.UTF-8 [[:islower:]] there is
786 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
787 in that case the whole multi-byte character and return
788 the original letter. On the other side, with
789 [[: DOTLESS SMALL LETTER I return [[:I, as doing
790 anything else would complicate things too much. */
792 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
793 return re_string_fetch_byte (pstr
);
795 off
= pstr
->offsets
[pstr
->cur_idx
];
796 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
799 return re_string_fetch_byte (pstr
);
801 re_string_skip_bytes (pstr
,
802 re_string_char_size_at (pstr
, pstr
->cur_idx
));
807 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
812 re_string_destruct (re_string_t
*pstr
)
814 #ifdef RE_ENABLE_I18N
816 re_free (pstr
->offsets
);
817 #endif /* RE_ENABLE_I18N */
818 if (pstr
->mbs_allocated
)
822 /* Return the context at IDX in INPUT. */
826 re_string_context_at (const re_string_t
*input
, int idx
, int eflags
)
830 /* In this case, we use the value stored in input->tip_context,
831 since we can't know the character in input->mbs[-1] here. */
832 return input
->tip_context
;
833 if (BE (idx
== input
->len
, 0))
834 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
835 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
836 #ifdef RE_ENABLE_I18N
837 if (input
->mb_cur_max
> 1)
841 while(input
->wcs
[wc_idx
] == WEOF
)
844 /* It must not happen. */
845 assert (wc_idx
>= 0);
849 return input
->tip_context
;
851 wc
= input
->wcs
[wc_idx
];
852 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
854 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
855 ? CONTEXT_NEWLINE
: 0);
858 c
= re_string_byte_at (input
, idx
);
859 if (bitset_contain (input
->word_char
, c
))
861 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
864 /* Functions for set operation. */
868 re_node_set_alloc (re_node_set
*set
, int size
)
872 set
->elems
= re_malloc (int, size
); /* can be NULL if size == 0
873 (see re_node_set_init_empty(set)) */
874 if (BE (set
->elems
== NULL
&& size
!= 0, 0))
881 re_node_set_init_1 (re_node_set
*set
, int elem
)
885 set
->elems
= re_malloc (int, 1);
886 if (BE (set
->elems
== NULL
, 0))
888 set
->alloc
= set
->nelem
= 0;
891 set
->elems
[0] = elem
;
897 re_node_set_init_2 (re_node_set
*set
, int elem1
, int elem2
)
900 set
->elems
= re_malloc (int, 2);
901 if (BE (set
->elems
== NULL
, 0))
906 set
->elems
[0] = elem1
;
913 set
->elems
[0] = elem1
;
914 set
->elems
[1] = elem2
;
918 set
->elems
[0] = elem2
;
919 set
->elems
[1] = elem1
;
927 re_node_set_init_copy (re_node_set
*dest
, const re_node_set
*src
)
929 dest
->nelem
= src
->nelem
;
932 dest
->alloc
= dest
->nelem
;
933 dest
->elems
= re_malloc (int, dest
->alloc
);
934 if (BE (dest
->elems
== NULL
, 0))
936 dest
->alloc
= dest
->nelem
= 0;
939 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
942 re_node_set_init_empty (dest
);
946 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
947 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
948 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
952 re_node_set_add_intersect (re_node_set
*dest
, const re_node_set
*src1
,
953 const re_node_set
*src2
)
955 int i1
, i2
, is
, id
, delta
, sbase
;
956 if (src1
->nelem
== 0 || src2
->nelem
== 0)
959 /* We need dest->nelem + 2 * elems_in_intersection; this is a
960 conservative estimate. */
961 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
963 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
964 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
965 if (BE (new_elems
== NULL
, 0))
967 dest
->elems
= new_elems
;
968 dest
->alloc
= new_alloc
;
971 /* Find the items in the intersection of SRC1 and SRC2, and copy
972 into the top of DEST those that are not already in DEST itself. */
973 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
974 i1
= src1
->nelem
- 1;
975 i2
= src2
->nelem
- 1;
976 id
= dest
->nelem
- 1;
979 if (src1
->elems
[i1
] == src2
->elems
[i2
])
981 /* Try to find the item in DEST. Maybe we could binary search? */
982 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
985 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
986 dest
->elems
[--sbase
] = src1
->elems
[i1
];
988 if (--i1
< 0 || --i2
< 0)
992 /* Lower the highest of the two items. */
993 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1005 id
= dest
->nelem
- 1;
1006 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1007 delta
= is
- sbase
+ 1;
1009 /* Now copy. When DELTA becomes zero, the remaining
1010 DEST elements are already in place; this is more or
1011 less the same loop that is in re_node_set_merge. */
1012 dest
->nelem
+= delta
;
1013 if (delta
> 0 && id
>= 0)
1016 if (dest
->elems
[is
] > dest
->elems
[id
])
1018 /* Copy from the top. */
1019 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1025 /* Slide from the bottom. */
1026 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1032 /* Copy remaining SRC elements. */
1033 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1038 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1039 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1041 static reg_errcode_t
1043 re_node_set_init_union (re_node_set
*dest
, const re_node_set
*src1
,
1044 const re_node_set
*src2
)
1047 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1049 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1050 dest
->elems
= re_malloc (int, dest
->alloc
);
1051 if (BE (dest
->elems
== NULL
, 0))
1056 if (src1
!= NULL
&& src1
->nelem
> 0)
1057 return re_node_set_init_copy (dest
, src1
);
1058 if (src2
!= NULL
&& src2
->nelem
> 0)
1059 return re_node_set_init_copy (dest
, src2
);
1060 re_node_set_init_empty (dest
);
1063 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1065 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1067 dest
->elems
[id
++] = src2
->elems
[i2
++];
1070 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1072 dest
->elems
[id
++] = src1
->elems
[i1
++];
1074 if (i1
< src1
->nelem
)
1076 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1077 (src1
->nelem
- i1
) * sizeof (int));
1078 id
+= src1
->nelem
- i1
;
1080 else if (i2
< src2
->nelem
)
1082 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1083 (src2
->nelem
- i2
) * sizeof (int));
1084 id
+= src2
->nelem
- i2
;
1090 /* Calculate the union set of the sets DEST and SRC. And store it to
1091 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1093 static reg_errcode_t
1095 re_node_set_merge (re_node_set
*dest
, const re_node_set
*src
)
1097 int is
, id
, sbase
, delta
;
1098 if (src
== NULL
|| src
->nelem
== 0)
1100 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1102 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1103 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1104 if (BE (new_buffer
== NULL
, 0))
1106 dest
->elems
= new_buffer
;
1107 dest
->alloc
= new_alloc
;
1110 if (BE (dest
->nelem
== 0, 0))
1112 dest
->nelem
= src
->nelem
;
1113 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1117 /* Copy into the top of DEST the items of SRC that are not
1118 found in DEST. Maybe we could binary search in DEST? */
1119 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1120 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1122 if (dest
->elems
[id
] == src
->elems
[is
])
1124 else if (dest
->elems
[id
] < src
->elems
[is
])
1125 dest
->elems
[--sbase
] = src
->elems
[is
--];
1126 else /* if (dest->elems[id] > src->elems[is]) */
1132 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1134 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1137 id
= dest
->nelem
- 1;
1138 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1139 delta
= is
- sbase
+ 1;
1143 /* Now copy. When DELTA becomes zero, the remaining
1144 DEST elements are already in place. */
1145 dest
->nelem
+= delta
;
1148 if (dest
->elems
[is
] > dest
->elems
[id
])
1150 /* Copy from the top. */
1151 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1157 /* Slide from the bottom. */
1158 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1161 /* Copy remaining SRC elements. */
1162 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1163 delta
* sizeof (int));
1172 /* Insert the new element ELEM to the re_node_set* SET.
1173 SET should not already have ELEM.
1174 return -1 if an error is occured, return 1 otherwise. */
1178 re_node_set_insert (re_node_set
*set
, int elem
)
1181 /* In case the set is empty. */
1182 if (set
->alloc
== 0)
1184 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1189 if (BE (set
->nelem
, 0) == 0)
1191 /* We already guaranteed above that set->alloc != 0. */
1192 set
->elems
[0] = elem
;
1197 /* Realloc if we need. */
1198 if (set
->alloc
== set
->nelem
)
1201 set
->alloc
= set
->alloc
* 2;
1202 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1203 if (BE (new_elems
== NULL
, 0))
1205 set
->elems
= new_elems
;
1208 /* Move the elements which follows the new element. Test the
1209 first element separately to skip a check in the inner loop. */
1210 if (elem
< set
->elems
[0])
1213 for (idx
= set
->nelem
; idx
> 0; idx
--)
1214 set
->elems
[idx
] = set
->elems
[idx
- 1];
1218 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1219 set
->elems
[idx
] = set
->elems
[idx
- 1];
1222 /* Insert the new element. */
1223 set
->elems
[idx
] = elem
;
1228 /* Insert the new element ELEM to the re_node_set* SET.
1229 SET should not already have any element greater than or equal to ELEM.
1230 Return -1 if an error is occured, return 1 otherwise. */
1234 re_node_set_insert_last (re_node_set
*set
, int elem
)
1236 /* Realloc if we need. */
1237 if (set
->alloc
== set
->nelem
)
1240 set
->alloc
= (set
->alloc
+ 1) * 2;
1241 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1242 if (BE (new_elems
== NULL
, 0))
1244 set
->elems
= new_elems
;
1247 /* Insert the new element. */
1248 set
->elems
[set
->nelem
++] = elem
;
1252 /* Compare two node sets SET1 and SET2.
1253 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1256 internal_function
__attribute ((pure
))
1257 re_node_set_compare (const re_node_set
*set1
, const re_node_set
*set2
)
1260 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1262 for (i
= set1
->nelem
; --i
>= 0 ; )
1263 if (set1
->elems
[i
] != set2
->elems
[i
])
1268 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1271 internal_function
__attribute ((pure
))
1272 re_node_set_contains (const re_node_set
*set
, int elem
)
1274 unsigned int idx
, right
, mid
;
1275 if (set
->nelem
<= 0)
1278 /* Binary search the element. */
1280 right
= set
->nelem
- 1;
1283 mid
= (idx
+ right
) / 2;
1284 if (set
->elems
[mid
] < elem
)
1289 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1294 re_node_set_remove_at (re_node_set
*set
, int idx
)
1296 if (idx
< 0 || idx
>= set
->nelem
)
1299 for (; idx
< set
->nelem
; idx
++)
1300 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1304 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1305 Or return -1, if an error will be occured. */
1309 re_dfa_add_node (re_dfa_t
*dfa
, re_token_t token
)
1311 #ifdef RE_ENABLE_I18N
1312 int type
= token
.type
;
1314 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1316 size_t new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1317 int *new_nexts
, *new_indices
;
1318 re_node_set
*new_edests
, *new_eclosures
;
1319 re_token_t
*new_nodes
;
1321 /* Avoid overflows. */
1322 if (BE (new_nodes_alloc
< dfa
->nodes_alloc
, 0))
1325 new_nodes
= re_realloc (dfa
->nodes
, re_token_t
, new_nodes_alloc
);
1326 if (BE (new_nodes
== NULL
, 0))
1328 dfa
->nodes
= new_nodes
;
1329 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1330 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1331 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1332 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
, new_nodes_alloc
);
1333 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1334 || new_edests
== NULL
|| new_eclosures
== NULL
, 0))
1336 dfa
->nexts
= new_nexts
;
1337 dfa
->org_indices
= new_indices
;
1338 dfa
->edests
= new_edests
;
1339 dfa
->eclosures
= new_eclosures
;
1340 dfa
->nodes_alloc
= new_nodes_alloc
;
1342 dfa
->nodes
[dfa
->nodes_len
] = token
;
1343 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1344 #ifdef RE_ENABLE_I18N
1345 dfa
->nodes
[dfa
->nodes_len
].accept_mb
=
1346 (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1) || type
== COMPLEX_BRACKET
;
1348 dfa
->nexts
[dfa
->nodes_len
] = -1;
1349 re_node_set_init_empty (dfa
->edests
+ dfa
->nodes_len
);
1350 re_node_set_init_empty (dfa
->eclosures
+ dfa
->nodes_len
);
1351 return dfa
->nodes_len
++;
1354 static __inline__
unsigned int
1356 calc_state_hash (const re_node_set
*nodes
, unsigned int context
)
1358 unsigned int hash
= nodes
->nelem
+ context
;
1360 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1361 hash
+= nodes
->elems
[i
];
1365 /* Search for the state whose node_set is equivalent to NODES.
1366 Return the pointer to the state, if we found it in the DFA.
1367 Otherwise create the new one and return it. In case of an error
1368 return NULL and set the error code in ERR.
1369 Note: - We assume NULL as the invalid state, then it is possible that
1370 return value is NULL and ERR is REG_NOERROR.
1371 - We never return non-NULL value in case of any errors, it is for
1374 static re_dfastate_t
*
1376 re_acquire_state (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1377 const re_node_set
*nodes
)
1380 re_dfastate_t
*new_state
;
1381 struct re_state_table_entry
*spot
;
1383 if (BE (nodes
->nelem
== 0, 0))
1388 hash
= calc_state_hash (nodes
, 0);
1389 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1391 for (i
= 0 ; i
< spot
->num
; i
++)
1393 re_dfastate_t
*state
= spot
->array
[i
];
1394 if (hash
!= state
->hash
)
1396 if (re_node_set_compare (&state
->nodes
, nodes
))
1400 /* There are no appropriate state in the dfa, create the new one. */
1401 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1402 if (BE (new_state
== NULL
, 0))
1408 /* Search for the state whose node_set is equivalent to NODES and
1409 whose context is equivalent to CONTEXT.
1410 Return the pointer to the state, if we found it in the DFA.
1411 Otherwise create the new one and return it. In case of an error
1412 return NULL and set the error code in ERR.
1413 Note: - We assume NULL as the invalid state, then it is possible that
1414 return value is NULL and ERR is REG_NOERROR.
1415 - We never return non-NULL value in case of any errors, it is for
1418 static re_dfastate_t
*
1420 re_acquire_state_context (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1421 const re_node_set
*nodes
, unsigned int context
)
1424 re_dfastate_t
*new_state
;
1425 struct re_state_table_entry
*spot
;
1427 if (nodes
->nelem
== 0)
1432 hash
= calc_state_hash (nodes
, context
);
1433 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1435 for (i
= 0 ; i
< spot
->num
; i
++)
1437 re_dfastate_t
*state
= spot
->array
[i
];
1438 if (state
->hash
== hash
1439 && state
->context
== context
1440 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1443 /* There are no appropriate state in `dfa', create the new one. */
1444 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1445 if (BE (new_state
== NULL
, 0))
1451 /* Finish initialization of the new state NEWSTATE, and using its hash value
1452 HASH put in the appropriate bucket of DFA's state table. Return value
1453 indicates the error code if failed. */
1455 static reg_errcode_t
1456 register_state (const re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
1459 struct re_state_table_entry
*spot
;
1463 newstate
->hash
= hash
;
1464 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1465 if (BE (err
!= REG_NOERROR
, 0))
1467 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1469 int elem
= newstate
->nodes
.elems
[i
];
1470 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1471 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1474 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1475 if (BE (spot
->alloc
<= spot
->num
, 0))
1477 int new_alloc
= 2 * spot
->num
+ 2;
1478 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1480 if (BE (new_array
== NULL
, 0))
1482 spot
->array
= new_array
;
1483 spot
->alloc
= new_alloc
;
1485 spot
->array
[spot
->num
++] = newstate
;
1490 free_state (re_dfastate_t
*state
)
1492 re_node_set_free (&state
->non_eps_nodes
);
1493 re_node_set_free (&state
->inveclosure
);
1494 if (state
->entrance_nodes
!= &state
->nodes
)
1496 re_node_set_free (state
->entrance_nodes
);
1497 re_free (state
->entrance_nodes
);
1499 re_node_set_free (&state
->nodes
);
1500 re_free (state
->word_trtable
);
1501 re_free (state
->trtable
);
1505 /* Create the new state which is independ of contexts.
1506 Return the new state if succeeded, otherwise return NULL. */
1508 static re_dfastate_t
*
1510 create_ci_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1515 re_dfastate_t
*newstate
;
1517 newstate
= calloc (sizeof (re_dfastate_t
), 1);
1518 if (BE (newstate
== NULL
, 0))
1520 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1521 if (BE (err
!= REG_NOERROR
, 0))
1527 newstate
->entrance_nodes
= &newstate
->nodes
;
1528 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1530 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1531 re_token_type_t type
= node
->type
;
1533 if (type
== CHARACTER
&& !node
->constraint
)
1535 #ifdef RE_ENABLE_I18N
1536 newstate
->accept_mb
|= node
->accept_mb
;
1539 /* If the state has the halt node, the state is a halt state. */
1540 if (type
== END_OF_RE
)
1542 else if (type
== OP_BACK_REF
)
1543 newstate
->has_backref
= 1;
1544 else if (type
== ANCHOR
|| node
->constraint
)
1545 newstate
->has_constraint
= 1;
1547 err
= register_state (dfa
, newstate
, hash
);
1548 if (BE (err
!= REG_NOERROR
, 0))
1550 free_state (newstate
);
1556 /* Create the new state which is depend on the context CONTEXT.
1557 Return the new state if succeeded, otherwise return NULL. */
1559 static re_dfastate_t
*
1561 create_cd_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1562 unsigned int context
, unsigned int hash
)
1564 int i
, nctx_nodes
= 0;
1566 re_dfastate_t
*newstate
;
1568 newstate
= calloc (sizeof (re_dfastate_t
), 1);
1569 if (BE (newstate
== NULL
, 0))
1571 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1572 if (BE (err
!= REG_NOERROR
, 0))
1578 newstate
->context
= context
;
1579 newstate
->entrance_nodes
= &newstate
->nodes
;
1581 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1583 unsigned int constraint
= 0;
1584 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1585 re_token_type_t type
= node
->type
;
1586 if (node
->constraint
)
1587 constraint
= node
->constraint
;
1589 if (type
== CHARACTER
&& !constraint
)
1591 #ifdef RE_ENABLE_I18N
1592 newstate
->accept_mb
|= node
->accept_mb
;
1593 #endif /* RE_ENABLE_I18N */
1595 /* If the state has the halt node, the state is a halt state. */
1596 if (type
== END_OF_RE
)
1598 else if (type
== OP_BACK_REF
)
1599 newstate
->has_backref
= 1;
1600 else if (type
== ANCHOR
)
1601 constraint
= node
->opr
.ctx_type
;
1605 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1607 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1608 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1610 free_state (newstate
);
1613 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1615 newstate
->has_constraint
= 1;
1618 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1620 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1625 err
= register_state (dfa
, newstate
, hash
);
1626 if (BE (err
!= REG_NOERROR
, 0))
1628 free_state (newstate
);