1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
25 static re_dfastate_t
*create_ci_newstate (const re_dfa_t
*dfa
,
26 const re_node_set
*nodes
,
27 unsigned int hash
) internal_function
;
28 static re_dfastate_t
*create_cd_newstate (const re_dfa_t
*dfa
,
29 const re_node_set
*nodes
,
31 unsigned int hash
) internal_function
;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
40 re_string_allocate (re_string_t
*pstr
, const char *str
, int len
, int init_len
,
41 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len
< dfa
->mb_cur_max
)
48 init_len
= dfa
->mb_cur_max
;
49 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
50 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
52 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
53 if (BE (ret
!= REG_NOERROR
, 0))
56 pstr
->word_char
= dfa
->word_char
;
57 pstr
->word_ops_used
= dfa
->word_ops_used
;
58 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
59 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
60 pstr
->valid_raw_len
= pstr
->valid_len
;
64 /* This function allocate the buffers, and initialize them. */
68 re_string_construct (re_string_t
*pstr
, const char *str
, int len
,
69 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
72 memset (pstr
, '\0', sizeof (re_string_t
));
73 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
77 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
78 if (BE (ret
!= REG_NOERROR
, 0))
81 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
86 if (dfa
->mb_cur_max
> 1)
90 ret
= build_wcs_upper_buffer (pstr
);
91 if (BE (ret
!= REG_NOERROR
, 0))
93 if (pstr
->valid_raw_len
>= len
)
95 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
97 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
98 if (BE (ret
!= REG_NOERROR
, 0))
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr
);
108 #ifdef RE_ENABLE_I18N
109 if (dfa
->mb_cur_max
> 1)
110 build_wcs_buffer (pstr
);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr
);
118 pstr
->valid_len
= pstr
->bufs_len
;
119 pstr
->valid_raw_len
= pstr
->bufs_len
;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
131 re_string_realloc_buffers (re_string_t
*pstr
, int new_buf_len
)
133 #ifdef RE_ENABLE_I18N
134 if (pstr
->mb_cur_max
> 1)
136 wint_t *new_wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
137 if (BE (new_wcs
== NULL
, 0))
140 if (pstr
->offsets
!= NULL
)
142 int *new_offsets
= re_realloc (pstr
->offsets
, int, new_buf_len
);
143 if (BE (new_offsets
== NULL
, 0))
145 pstr
->offsets
= new_offsets
;
148 #endif /* RE_ENABLE_I18N */
149 if (pstr
->mbs_allocated
)
151 unsigned char *new_mbs
= re_realloc (pstr
->mbs
, unsigned char,
153 if (BE (new_mbs
== NULL
, 0))
157 pstr
->bufs_len
= new_buf_len
;
164 re_string_construct_common (const char *str
, int len
, re_string_t
*pstr
,
165 RE_TRANSLATE_TYPE trans
, int icase
,
168 pstr
->raw_mbs
= (const unsigned char *) str
;
172 pstr
->icase
= icase
? 1 : 0;
173 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
174 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
175 pstr
->is_utf8
= dfa
->is_utf8
;
176 pstr
->map_notascii
= dfa
->map_notascii
;
177 pstr
->stop
= pstr
->len
;
178 pstr
->raw_stop
= pstr
->stop
;
181 #ifdef RE_ENABLE_I18N
183 /* Build wide character buffer PSTR->WCS.
184 If the byte sequence of the string are:
185 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
186 Then wide character buffer will be:
187 <wc1> , WEOF , <wc2> , WEOF , <wc3>
188 We use WEOF for padding, they indicate that the position isn't
189 a first byte of a multibyte character.
191 Note that this function assumes PSTR->VALID_LEN elements are already
192 built and starts from PSTR->VALID_LEN. */
196 build_wcs_buffer (re_string_t
*pstr
)
199 unsigned char buf
[MB_LEN_MAX
];
200 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
202 unsigned char buf
[64];
205 int byte_idx
, end_idx
, remain_len
;
208 /* Build the buffers from pstr->valid_len to either pstr->len or
210 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
211 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
216 remain_len
= end_idx
- byte_idx
;
217 prev_st
= pstr
->cur_state
;
218 /* Apply the translation if we need. */
219 if (BE (pstr
->trans
!= NULL
, 0))
223 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
225 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
226 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
228 p
= (const char *) buf
;
231 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
232 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
233 if (BE (mbclen
== (size_t) -2, 0))
235 /* The buffer doesn't have enough space, finish to build. */
236 pstr
->cur_state
= prev_st
;
239 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
241 /* We treat these cases as a singlebyte character. */
243 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
244 if (BE (pstr
->trans
!= NULL
, 0))
245 wc
= pstr
->trans
[wc
];
246 pstr
->cur_state
= prev_st
;
249 /* Write wide character and padding. */
250 pstr
->wcs
[byte_idx
++] = wc
;
251 /* Write paddings. */
252 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
253 pstr
->wcs
[byte_idx
++] = WEOF
;
255 pstr
->valid_len
= byte_idx
;
256 pstr
->valid_raw_len
= byte_idx
;
259 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
260 but for REG_ICASE. */
264 build_wcs_upper_buffer (re_string_t
*pstr
)
267 int src_idx
, byte_idx
, end_idx
, remain_len
;
270 char buf
[MB_LEN_MAX
];
271 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
276 byte_idx
= pstr
->valid_len
;
277 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
279 /* The following optimization assumes that ASCII characters can be
280 mapped to wide characters with a simple cast. */
281 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
283 while (byte_idx
< end_idx
)
287 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
288 && mbsinit (&pstr
->cur_state
))
290 /* In case of a singlebyte character. */
292 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
293 /* The next step uses the assumption that wchar_t is encoded
294 ASCII-safe: all ASCII values can be converted like this. */
295 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
300 remain_len
= end_idx
- byte_idx
;
301 prev_st
= pstr
->cur_state
;
302 mbclen
= mbrtowc (&wc
,
303 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
304 + byte_idx
), remain_len
, &pstr
->cur_state
);
305 if (BE (mbclen
+ 2 > 2, 1))
313 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
314 if (BE (mbclen
== mbcdlen
, 1))
315 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
323 memcpy (pstr
->mbs
+ byte_idx
,
324 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
325 pstr
->wcs
[byte_idx
++] = wcu
;
326 /* Write paddings. */
327 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
328 pstr
->wcs
[byte_idx
++] = WEOF
;
330 else if (mbclen
== (size_t) -1 || mbclen
== 0)
332 /* It is an invalid character or '\0'. Just use the byte. */
333 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
334 pstr
->mbs
[byte_idx
] = ch
;
335 /* And also cast it to wide char. */
336 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
337 if (BE (mbclen
== (size_t) -1, 0))
338 pstr
->cur_state
= prev_st
;
342 /* The buffer doesn't have enough space, finish to build. */
343 pstr
->cur_state
= prev_st
;
347 pstr
->valid_len
= byte_idx
;
348 pstr
->valid_raw_len
= byte_idx
;
352 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
357 remain_len
= end_idx
- byte_idx
;
358 prev_st
= pstr
->cur_state
;
359 if (BE (pstr
->trans
!= NULL
, 0))
363 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
365 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
366 buf
[i
] = pstr
->trans
[ch
];
368 p
= (const char *) buf
;
371 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
372 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
373 if (BE (mbclen
+ 2 > 2, 1))
381 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
382 if (BE (mbclen
== mbcdlen
, 1))
383 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
384 else if (mbcdlen
!= (size_t) -1)
388 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
390 pstr
->cur_state
= prev_st
;
394 if (pstr
->offsets
== NULL
)
396 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
398 if (pstr
->offsets
== NULL
)
401 if (!pstr
->offsets_needed
)
403 for (i
= 0; i
< (size_t) byte_idx
; ++i
)
404 pstr
->offsets
[i
] = i
;
405 pstr
->offsets_needed
= 1;
408 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
409 pstr
->wcs
[byte_idx
] = wcu
;
410 pstr
->offsets
[byte_idx
] = src_idx
;
411 for (i
= 1; i
< mbcdlen
; ++i
)
413 pstr
->offsets
[byte_idx
+ i
]
414 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
415 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
417 pstr
->len
+= mbcdlen
- mbclen
;
418 if (pstr
->raw_stop
> src_idx
)
419 pstr
->stop
+= mbcdlen
- mbclen
;
420 end_idx
= (pstr
->bufs_len
> pstr
->len
)
421 ? pstr
->len
: pstr
->bufs_len
;
427 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
430 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
432 if (BE (pstr
->offsets_needed
!= 0, 0))
435 for (i
= 0; i
< mbclen
; ++i
)
436 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
440 pstr
->wcs
[byte_idx
++] = wcu
;
441 /* Write paddings. */
442 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
443 pstr
->wcs
[byte_idx
++] = WEOF
;
445 else if (mbclen
== (size_t) -1 || mbclen
== 0)
447 /* It is an invalid character or '\0'. Just use the byte. */
448 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
450 if (BE (pstr
->trans
!= NULL
, 0))
451 ch
= pstr
->trans
[ch
];
452 pstr
->mbs
[byte_idx
] = ch
;
454 if (BE (pstr
->offsets_needed
!= 0, 0))
455 pstr
->offsets
[byte_idx
] = src_idx
;
458 /* And also cast it to wide char. */
459 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
460 if (BE (mbclen
== (size_t) -1, 0))
461 pstr
->cur_state
= prev_st
;
465 /* The buffer doesn't have enough space, finish to build. */
466 pstr
->cur_state
= prev_st
;
470 pstr
->valid_len
= byte_idx
;
471 pstr
->valid_raw_len
= src_idx
;
475 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
480 re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
, wint_t *last_wc
)
487 /* Skip the characters which are not necessary to check. */
488 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
489 rawbuf_idx
< new_raw_idx
;)
492 remain_len
= pstr
->len
- rawbuf_idx
;
493 prev_st
= pstr
->cur_state
;
494 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
495 remain_len
, &pstr
->cur_state
);
496 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
498 /* We treat these cases as a single byte character. */
499 if (mbclen
== 0 || remain_len
== 0)
502 wc
= *(unsigned char *) (pstr
->raw_mbs
+ rawbuf_idx
);
504 pstr
->cur_state
= prev_st
;
506 /* Then proceed the next character. */
507 rawbuf_idx
+= mbclen
;
509 *last_wc
= (wint_t) wc
;
512 #endif /* RE_ENABLE_I18N */
514 /* Build the buffer PSTR->MBS, and apply the translation if we need.
515 This function is used in case of REG_ICASE. */
519 build_upper_buffer (re_string_t
*pstr
)
521 int char_idx
, end_idx
;
522 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
524 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
526 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
527 if (BE (pstr
->trans
!= NULL
, 0))
528 ch
= pstr
->trans
[ch
];
530 pstr
->mbs
[char_idx
] = toupper (ch
);
532 pstr
->mbs
[char_idx
] = ch
;
534 pstr
->valid_len
= char_idx
;
535 pstr
->valid_raw_len
= char_idx
;
538 /* Apply TRANS to the buffer in PSTR. */
542 re_string_translate_buffer (re_string_t
*pstr
)
544 int buf_idx
, end_idx
;
545 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
547 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
549 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
550 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
553 pstr
->valid_len
= buf_idx
;
554 pstr
->valid_raw_len
= buf_idx
;
557 /* This function re-construct the buffers.
558 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
559 convert to upper case in case of REG_ICASE, apply translation. */
563 re_string_reconstruct (re_string_t
*pstr
, int idx
, int eflags
)
565 int offset
= idx
- pstr
->raw_mbs_idx
;
566 if (BE (offset
< 0, 0))
569 #ifdef RE_ENABLE_I18N
570 if (pstr
->mb_cur_max
> 1)
571 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
572 #endif /* RE_ENABLE_I18N */
573 pstr
->len
= pstr
->raw_len
;
574 pstr
->stop
= pstr
->raw_stop
;
576 pstr
->raw_mbs_idx
= 0;
577 pstr
->valid_raw_len
= 0;
578 pstr
->offsets_needed
= 0;
579 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
580 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
581 if (!pstr
->mbs_allocated
)
582 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
586 if (BE (offset
!= 0, 1))
588 /* Are the characters which are already checked remain? */
589 if (BE (offset
< pstr
->valid_raw_len
, 1)
590 #ifdef RE_ENABLE_I18N
591 /* Handling this would enlarge the code too much.
592 Accept a slowdown in that case. */
593 && pstr
->offsets_needed
== 0
597 /* Yes, move them to the front of the buffer. */
598 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
599 #ifdef RE_ENABLE_I18N
600 if (pstr
->mb_cur_max
> 1)
601 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
602 (pstr
->valid_len
- offset
) * sizeof (wint_t));
603 #endif /* RE_ENABLE_I18N */
604 if (BE (pstr
->mbs_allocated
, 0))
605 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
606 pstr
->valid_len
- offset
);
607 pstr
->valid_len
-= offset
;
608 pstr
->valid_raw_len
-= offset
;
610 assert (pstr
->valid_len
> 0);
615 /* No, skip all characters until IDX. */
616 #ifdef RE_ENABLE_I18N
617 if (BE (pstr
->offsets_needed
, 0))
619 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
620 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
621 pstr
->offsets_needed
= 0;
625 #ifdef RE_ENABLE_I18N
626 if (pstr
->mb_cur_max
> 1)
633 const unsigned char *raw
, *p
, *q
, *end
;
635 /* Special case UTF-8. Multi-byte chars start with any
636 byte other than 0x80 - 0xbf. */
637 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
638 end
= raw
+ (offset
- pstr
->mb_cur_max
);
639 p
= raw
+ offset
- 1;
641 /* We know the wchar_t encoding is UCS4, so for the simple
642 case, ASCII characters, skip the conversion step. */
643 if (isascii (*p
) && BE (pstr
->trans
== NULL
, 1))
645 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
651 for (; p
>= end
; --p
)
652 if ((*p
& 0xc0) != 0x80)
656 int mlen
= raw
+ pstr
->len
- p
;
657 unsigned char buf
[6];
661 if (BE (pstr
->trans
!= NULL
, 0))
663 int i
= mlen
< 6 ? mlen
: 6;
665 buf
[i
] = pstr
->trans
[p
[i
]];
668 /* XXX Don't use mbrtowc, we know which conversion
669 to use (UTF-8 -> UCS4). */
670 memset (&cur_state
, 0, sizeof (cur_state
));
671 mbclen
= mbrtowc (&wc2
, (const char *) p
, mlen
,
673 if (raw
+ offset
- p
<= mbclen
674 && mbclen
< (size_t) -2)
676 memset (&pstr
->cur_state
, '\0',
678 pstr
->valid_len
= mbclen
- (raw
+ offset
- p
);
686 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
689 = re_string_context_at (pstr
, pstr
->valid_raw_len
- 1, eflags
);
691 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
692 && IS_WIDE_WORD_CHAR (wc
))
694 : ((IS_WIDE_NEWLINE (wc
)
695 && pstr
->newline_anchor
)
696 ? CONTEXT_NEWLINE
: 0));
697 if (BE (pstr
->valid_len
, 0))
699 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
700 pstr
->wcs
[wcs_idx
] = WEOF
;
701 if (pstr
->mbs_allocated
)
702 memset (pstr
->mbs
, 255, pstr
->valid_len
);
704 pstr
->valid_raw_len
= pstr
->valid_len
;
707 #endif /* RE_ENABLE_I18N */
709 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
710 pstr
->valid_raw_len
= 0;
713 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
715 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
716 ? CONTEXT_NEWLINE
: 0));
719 if (!BE (pstr
->mbs_allocated
, 0))
722 pstr
->raw_mbs_idx
= idx
;
724 pstr
->stop
-= offset
;
726 /* Then build the buffers. */
727 #ifdef RE_ENABLE_I18N
728 if (pstr
->mb_cur_max
> 1)
732 reg_errcode_t ret
= build_wcs_upper_buffer (pstr
);
733 if (BE (ret
!= REG_NOERROR
, 0))
737 build_wcs_buffer (pstr
);
740 #endif /* RE_ENABLE_I18N */
741 if (BE (pstr
->mbs_allocated
, 0))
744 build_upper_buffer (pstr
);
745 else if (pstr
->trans
!= NULL
)
746 re_string_translate_buffer (pstr
);
749 pstr
->valid_len
= pstr
->len
;
756 internal_function
__attribute ((pure
))
757 re_string_peek_byte_case (const re_string_t
*pstr
, int idx
)
761 /* Handle the common (easiest) cases first. */
762 if (BE (!pstr
->mbs_allocated
, 1))
763 return re_string_peek_byte (pstr
, idx
);
765 #ifdef RE_ENABLE_I18N
766 if (pstr
->mb_cur_max
> 1
767 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
768 return re_string_peek_byte (pstr
, idx
);
771 off
= pstr
->cur_idx
+ idx
;
772 #ifdef RE_ENABLE_I18N
773 if (pstr
->offsets_needed
)
774 off
= pstr
->offsets
[off
];
777 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
779 #ifdef RE_ENABLE_I18N
780 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
781 this function returns CAPITAL LETTER I instead of first byte of
782 DOTLESS SMALL LETTER I. The latter would confuse the parser,
783 since peek_byte_case doesn't advance cur_idx in any way. */
784 if (pstr
->offsets_needed
&& !isascii (ch
))
785 return re_string_peek_byte (pstr
, idx
);
792 internal_function
__attribute ((pure
))
793 re_string_fetch_byte_case (re_string_t
*pstr
)
795 if (BE (!pstr
->mbs_allocated
, 1))
796 return re_string_fetch_byte (pstr
);
798 #ifdef RE_ENABLE_I18N
799 if (pstr
->offsets_needed
)
803 /* For tr_TR.UTF-8 [[:islower:]] there is
804 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
805 in that case the whole multi-byte character and return
806 the original letter. On the other side, with
807 [[: DOTLESS SMALL LETTER I return [[:I, as doing
808 anything else would complicate things too much. */
810 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
811 return re_string_fetch_byte (pstr
);
813 off
= pstr
->offsets
[pstr
->cur_idx
];
814 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
817 return re_string_fetch_byte (pstr
);
819 re_string_skip_bytes (pstr
,
820 re_string_char_size_at (pstr
, pstr
->cur_idx
));
825 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
830 re_string_destruct (re_string_t
*pstr
)
832 #ifdef RE_ENABLE_I18N
834 re_free (pstr
->offsets
);
835 #endif /* RE_ENABLE_I18N */
836 if (pstr
->mbs_allocated
)
840 /* Return the context at IDX in INPUT. */
844 re_string_context_at (const re_string_t
*input
, int idx
, int eflags
)
848 /* In this case, we use the value stored in input->tip_context,
849 since we can't know the character in input->mbs[-1] here. */
850 return input
->tip_context
;
851 if (BE (idx
== input
->len
, 0))
852 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
853 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
854 #ifdef RE_ENABLE_I18N
855 if (input
->mb_cur_max
> 1)
859 while(input
->wcs
[wc_idx
] == WEOF
)
862 /* It must not happen. */
863 assert (wc_idx
>= 0);
867 return input
->tip_context
;
869 wc
= input
->wcs
[wc_idx
];
870 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
872 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
873 ? CONTEXT_NEWLINE
: 0);
878 c
= re_string_byte_at (input
, idx
);
879 if (bitset_contain (input
->word_char
, c
))
881 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
885 /* Functions for set operation. */
889 re_node_set_alloc (re_node_set
*set
, int size
)
893 set
->elems
= re_malloc (int, size
);
894 if (BE (set
->elems
== NULL
, 0))
901 re_node_set_init_1 (re_node_set
*set
, int elem
)
905 set
->elems
= re_malloc (int, 1);
906 if (BE (set
->elems
== NULL
, 0))
908 set
->alloc
= set
->nelem
= 0;
911 set
->elems
[0] = elem
;
917 re_node_set_init_2 (re_node_set
*set
, int elem1
, int elem2
)
920 set
->elems
= re_malloc (int, 2);
921 if (BE (set
->elems
== NULL
, 0))
926 set
->elems
[0] = elem1
;
933 set
->elems
[0] = elem1
;
934 set
->elems
[1] = elem2
;
938 set
->elems
[0] = elem2
;
939 set
->elems
[1] = elem1
;
947 re_node_set_init_copy (re_node_set
*dest
, const re_node_set
*src
)
949 dest
->nelem
= src
->nelem
;
952 dest
->alloc
= dest
->nelem
;
953 dest
->elems
= re_malloc (int, dest
->alloc
);
954 if (BE (dest
->elems
== NULL
, 0))
956 dest
->alloc
= dest
->nelem
= 0;
959 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
962 re_node_set_init_empty (dest
);
966 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
967 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
968 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
972 re_node_set_add_intersect (re_node_set
*dest
, const re_node_set
*src1
,
973 const re_node_set
*src2
)
975 int i1
, i2
, is
, id
, delta
, sbase
;
976 if (src1
->nelem
== 0 || src2
->nelem
== 0)
979 /* We need dest->nelem + 2 * elems_in_intersection; this is a
980 conservative estimate. */
981 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
983 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
984 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
985 if (BE (new_elems
== NULL
, 0))
987 dest
->elems
= new_elems
;
988 dest
->alloc
= new_alloc
;
991 /* Find the items in the intersection of SRC1 and SRC2, and copy
992 into the top of DEST those that are not already in DEST itself. */
993 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
994 i1
= src1
->nelem
- 1;
995 i2
= src2
->nelem
- 1;
996 id
= dest
->nelem
- 1;
999 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1001 /* Try to find the item in DEST. Maybe we could binary search? */
1002 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
1005 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
1006 dest
->elems
[--sbase
] = src1
->elems
[i1
];
1008 if (--i1
< 0 || --i2
< 0)
1012 /* Lower the highest of the two items. */
1013 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1025 id
= dest
->nelem
- 1;
1026 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1027 delta
= is
- sbase
+ 1;
1029 /* Now copy. When DELTA becomes zero, the remaining
1030 DEST elements are already in place; this is more or
1031 less the same loop that is in re_node_set_merge. */
1032 dest
->nelem
+= delta
;
1033 if (delta
> 0 && id
>= 0)
1036 if (dest
->elems
[is
] > dest
->elems
[id
])
1038 /* Copy from the top. */
1039 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1045 /* Slide from the bottom. */
1046 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1052 /* Copy remaining SRC elements. */
1053 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1058 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1059 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1061 static reg_errcode_t
1063 re_node_set_init_union (re_node_set
*dest
, const re_node_set
*src1
,
1064 const re_node_set
*src2
)
1067 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1069 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1070 dest
->elems
= re_malloc (int, dest
->alloc
);
1071 if (BE (dest
->elems
== NULL
, 0))
1076 if (src1
!= NULL
&& src1
->nelem
> 0)
1077 return re_node_set_init_copy (dest
, src1
);
1078 else if (src2
!= NULL
&& src2
->nelem
> 0)
1079 return re_node_set_init_copy (dest
, src2
);
1081 re_node_set_init_empty (dest
);
1084 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1086 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1088 dest
->elems
[id
++] = src2
->elems
[i2
++];
1091 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1093 dest
->elems
[id
++] = src1
->elems
[i1
++];
1095 if (i1
< src1
->nelem
)
1097 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1098 (src1
->nelem
- i1
) * sizeof (int));
1099 id
+= src1
->nelem
- i1
;
1101 else if (i2
< src2
->nelem
)
1103 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1104 (src2
->nelem
- i2
) * sizeof (int));
1105 id
+= src2
->nelem
- i2
;
1111 /* Calculate the union set of the sets DEST and SRC. And store it to
1112 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1114 static reg_errcode_t
1116 re_node_set_merge (re_node_set
*dest
, const re_node_set
*src
)
1118 int is
, id
, sbase
, delta
;
1119 if (src
== NULL
|| src
->nelem
== 0)
1121 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1123 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1124 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1125 if (BE (new_buffer
== NULL
, 0))
1127 dest
->elems
= new_buffer
;
1128 dest
->alloc
= new_alloc
;
1131 if (BE (dest
->nelem
== 0, 0))
1133 dest
->nelem
= src
->nelem
;
1134 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1138 /* Copy into the top of DEST the items of SRC that are not
1139 found in DEST. Maybe we could binary search in DEST? */
1140 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1141 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1143 if (dest
->elems
[id
] == src
->elems
[is
])
1145 else if (dest
->elems
[id
] < src
->elems
[is
])
1146 dest
->elems
[--sbase
] = src
->elems
[is
--];
1147 else /* if (dest->elems[id] > src->elems[is]) */
1153 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1155 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1158 id
= dest
->nelem
- 1;
1159 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1160 delta
= is
- sbase
+ 1;
1164 /* Now copy. When DELTA becomes zero, the remaining
1165 DEST elements are already in place. */
1166 dest
->nelem
+= delta
;
1169 if (dest
->elems
[is
] > dest
->elems
[id
])
1171 /* Copy from the top. */
1172 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1178 /* Slide from the bottom. */
1179 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1182 /* Copy remaining SRC elements. */
1183 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1184 delta
* sizeof (int));
1193 /* Insert the new element ELEM to the re_node_set* SET.
1194 SET should not already have ELEM.
1195 return -1 if an error is occured, return 1 otherwise. */
1199 re_node_set_insert (re_node_set
*set
, int elem
)
1202 /* In case the set is empty. */
1203 if (set
->alloc
== 0)
1205 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1211 if (BE (set
->nelem
, 0) == 0)
1213 /* We already guaranteed above that set->alloc != 0. */
1214 set
->elems
[0] = elem
;
1219 /* Realloc if we need. */
1220 if (set
->alloc
== set
->nelem
)
1223 set
->alloc
= set
->alloc
* 2;
1224 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1225 if (BE (new_elems
== NULL
, 0))
1227 set
->elems
= new_elems
;
1230 /* Move the elements which follows the new element. Test the
1231 first element separately to skip a check in the inner loop. */
1232 if (elem
< set
->elems
[0])
1235 for (idx
= set
->nelem
; idx
> 0; idx
--)
1236 set
->elems
[idx
] = set
->elems
[idx
- 1];
1240 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1241 set
->elems
[idx
] = set
->elems
[idx
- 1];
1244 /* Insert the new element. */
1245 set
->elems
[idx
] = elem
;
1250 /* Insert the new element ELEM to the re_node_set* SET.
1251 SET should not already have any element greater than or equal to ELEM.
1252 Return -1 if an error is occured, return 1 otherwise. */
1256 re_node_set_insert_last (re_node_set
*set
, int elem
)
1258 /* Realloc if we need. */
1259 if (set
->alloc
== set
->nelem
)
1262 set
->alloc
= (set
->alloc
+ 1) * 2;
1263 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1264 if (BE (new_elems
== NULL
, 0))
1266 set
->elems
= new_elems
;
1269 /* Insert the new element. */
1270 set
->elems
[set
->nelem
++] = elem
;
1274 /* Compare two node sets SET1 and SET2.
1275 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1278 internal_function
__attribute ((pure
))
1279 re_node_set_compare (const re_node_set
*set1
, const re_node_set
*set2
)
1282 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1284 for (i
= set1
->nelem
; --i
>= 0 ; )
1285 if (set1
->elems
[i
] != set2
->elems
[i
])
1290 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1293 internal_function
__attribute ((pure
))
1294 re_node_set_contains (const re_node_set
*set
, int elem
)
1296 unsigned int idx
, right
, mid
;
1297 if (set
->nelem
<= 0)
1300 /* Binary search the element. */
1302 right
= set
->nelem
- 1;
1305 mid
= (idx
+ right
) / 2;
1306 if (set
->elems
[mid
] < elem
)
1311 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1316 re_node_set_remove_at (re_node_set
*set
, int idx
)
1318 if (idx
< 0 || idx
>= set
->nelem
)
1321 for (; idx
< set
->nelem
; idx
++)
1322 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1326 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1327 Or return -1, if an error will be occured. */
1331 re_dfa_add_node (re_dfa_t
*dfa
, re_token_t token
)
1333 int type
= token
.type
;
1334 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1336 size_t new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1337 int *new_nexts
, *new_indices
;
1338 re_node_set
*new_edests
, *new_eclosures
;
1339 re_token_t
*new_nodes
;
1341 /* Avoid overflows. */
1342 if (BE (new_nodes_alloc
< dfa
->nodes_alloc
, 0))
1345 new_nodes
= re_realloc (dfa
->nodes
, re_token_t
, new_nodes_alloc
);
1346 if (BE (new_nodes
== NULL
, 0))
1348 dfa
->nodes
= new_nodes
;
1349 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1350 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1351 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1352 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
, new_nodes_alloc
);
1353 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1354 || new_edests
== NULL
|| new_eclosures
== NULL
, 0))
1356 dfa
->nexts
= new_nexts
;
1357 dfa
->org_indices
= new_indices
;
1358 dfa
->edests
= new_edests
;
1359 dfa
->eclosures
= new_eclosures
;
1360 dfa
->nodes_alloc
= new_nodes_alloc
;
1362 dfa
->nodes
[dfa
->nodes_len
] = token
;
1363 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1364 #ifdef RE_ENABLE_I18N
1365 dfa
->nodes
[dfa
->nodes_len
].accept_mb
=
1366 (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1) || type
== COMPLEX_BRACKET
;
1368 dfa
->nexts
[dfa
->nodes_len
] = -1;
1369 re_node_set_init_empty (dfa
->edests
+ dfa
->nodes_len
);
1370 re_node_set_init_empty (dfa
->eclosures
+ dfa
->nodes_len
);
1371 return dfa
->nodes_len
++;
1374 static inline unsigned int
1376 calc_state_hash (const re_node_set
*nodes
, unsigned int context
)
1378 unsigned int hash
= nodes
->nelem
+ context
;
1380 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1381 hash
+= nodes
->elems
[i
];
1385 /* Search for the state whose node_set is equivalent to NODES.
1386 Return the pointer to the state, if we found it in the DFA.
1387 Otherwise create the new one and return it. In case of an error
1388 return NULL and set the error code in ERR.
1389 Note: - We assume NULL as the invalid state, then it is possible that
1390 return value is NULL and ERR is REG_NOERROR.
1391 - We never return non-NULL value in case of any errors, it is for
1394 static re_dfastate_t
*
1396 re_acquire_state (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1397 const re_node_set
*nodes
)
1400 re_dfastate_t
*new_state
;
1401 struct re_state_table_entry
*spot
;
1403 if (BE (nodes
->nelem
== 0, 0))
1408 hash
= calc_state_hash (nodes
, 0);
1409 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1411 for (i
= 0 ; i
< spot
->num
; i
++)
1413 re_dfastate_t
*state
= spot
->array
[i
];
1414 if (hash
!= state
->hash
)
1416 if (re_node_set_compare (&state
->nodes
, nodes
))
1420 /* There are no appropriate state in the dfa, create the new one. */
1421 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1422 if (BE (new_state
== NULL
, 0))
1428 /* Search for the state whose node_set is equivalent to NODES and
1429 whose context is equivalent to CONTEXT.
1430 Return the pointer to the state, if we found it in the DFA.
1431 Otherwise create the new one and return it. In case of an error
1432 return NULL and set the error code in ERR.
1433 Note: - We assume NULL as the invalid state, then it is possible that
1434 return value is NULL and ERR is REG_NOERROR.
1435 - We never return non-NULL value in case of any errors, it is for
1438 static re_dfastate_t
*
1440 re_acquire_state_context (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1441 const re_node_set
*nodes
, unsigned int context
)
1444 re_dfastate_t
*new_state
;
1445 struct re_state_table_entry
*spot
;
1447 if (nodes
->nelem
== 0)
1452 hash
= calc_state_hash (nodes
, context
);
1453 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1455 for (i
= 0 ; i
< spot
->num
; i
++)
1457 re_dfastate_t
*state
= spot
->array
[i
];
1458 if (state
->hash
== hash
1459 && state
->context
== context
1460 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1463 /* There are no appropriate state in `dfa', create the new one. */
1464 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1465 if (BE (new_state
== NULL
, 0))
1471 /* Finish initialization of the new state NEWSTATE, and using its hash value
1472 HASH put in the appropriate bucket of DFA's state table. Return value
1473 indicates the error code if failed. */
1475 static reg_errcode_t
1476 register_state (const re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
1479 struct re_state_table_entry
*spot
;
1483 newstate
->hash
= hash
;
1484 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1485 if (BE (err
!= REG_NOERROR
, 0))
1487 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1489 int elem
= newstate
->nodes
.elems
[i
];
1490 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1491 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1494 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1495 if (BE (spot
->alloc
<= spot
->num
, 0))
1497 int new_alloc
= 2 * spot
->num
+ 2;
1498 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1500 if (BE (new_array
== NULL
, 0))
1502 spot
->array
= new_array
;
1503 spot
->alloc
= new_alloc
;
1505 spot
->array
[spot
->num
++] = newstate
;
1510 free_state (re_dfastate_t
*state
)
1512 re_node_set_free (&state
->non_eps_nodes
);
1513 re_node_set_free (&state
->inveclosure
);
1514 if (state
->entrance_nodes
!= &state
->nodes
)
1516 re_node_set_free (state
->entrance_nodes
);
1517 re_free (state
->entrance_nodes
);
1519 re_node_set_free (&state
->nodes
);
1520 re_free (state
->word_trtable
);
1521 re_free (state
->trtable
);
1525 /* Create the new state which is independ of contexts.
1526 Return the new state if succeeded, otherwise return NULL. */
1528 static re_dfastate_t
*
1530 create_ci_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1535 re_dfastate_t
*newstate
;
1537 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1538 if (BE (newstate
== NULL
, 0))
1540 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1541 if (BE (err
!= REG_NOERROR
, 0))
1547 newstate
->entrance_nodes
= &newstate
->nodes
;
1548 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1550 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1551 re_token_type_t type
= node
->type
;
1552 if (type
== CHARACTER
&& !node
->constraint
)
1554 #ifdef RE_ENABLE_I18N
1555 newstate
->accept_mb
|= node
->accept_mb
;
1556 #endif /* RE_ENABLE_I18N */
1558 /* If the state has the halt node, the state is a halt state. */
1559 if (type
== END_OF_RE
)
1561 else if (type
== OP_BACK_REF
)
1562 newstate
->has_backref
= 1;
1563 else if (type
== ANCHOR
|| node
->constraint
)
1564 newstate
->has_constraint
= 1;
1566 err
= register_state (dfa
, newstate
, hash
);
1567 if (BE (err
!= REG_NOERROR
, 0))
1569 free_state (newstate
);
1575 /* Create the new state which is depend on the context CONTEXT.
1576 Return the new state if succeeded, otherwise return NULL. */
1578 static re_dfastate_t
*
1580 create_cd_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1581 unsigned int context
, unsigned int hash
)
1583 int i
, nctx_nodes
= 0;
1585 re_dfastate_t
*newstate
;
1587 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1588 if (BE (newstate
== NULL
, 0))
1590 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1591 if (BE (err
!= REG_NOERROR
, 0))
1597 newstate
->context
= context
;
1598 newstate
->entrance_nodes
= &newstate
->nodes
;
1600 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1602 unsigned int constraint
= 0;
1603 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1604 re_token_type_t type
= node
->type
;
1605 if (node
->constraint
)
1606 constraint
= node
->constraint
;
1608 if (type
== CHARACTER
&& !constraint
)
1610 #ifdef RE_ENABLE_I18N
1611 newstate
->accept_mb
|= node
->accept_mb
;
1612 #endif /* RE_ENABLE_I18N */
1614 /* If the state has the halt node, the state is a halt state. */
1615 if (type
== END_OF_RE
)
1617 else if (type
== OP_BACK_REF
)
1618 newstate
->has_backref
= 1;
1619 else if (type
== ANCHOR
)
1620 constraint
= node
->opr
.ctx_type
;
1624 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1626 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1627 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1629 free_state (newstate
);
1632 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1634 newstate
->has_constraint
= 1;
1637 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1639 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1644 err
= register_state (dfa
, newstate
, hash
);
1645 if (BE (err
!= REG_NOERROR
, 0))
1647 free_state (newstate
);